from typing import List
from crazy_functions.review_fns.data_sources.base_source import PaperMetadata
import re

class ReferenceFormatter:
    """通用参考文献格式生成器"""

    def __init__(self):
        pass

    def _sanitize_bibtex(self, text: str) -> str:
        """清理BibTeX字符串，处理特殊字符"""
        if not text:
            return ""

        # 替换特殊字符
        replacements = {
            '&': '\\&',
            '%': '\\%',
            '$': '\\$',
            '#': '\\#',
            '_': '\\_',
            '{': '\\{',
            '}': '\\}',
            '~': '\\textasciitilde{}',
            '^': '\\textasciicircum{}',
            '\\': '\\textbackslash{}',
            '<': '\\textless{}',
            '>': '\\textgreater{}',
            '"': '``',
            "'": "'",
            '-': '--',
            '—': '---',
        }

        for char, replacement in replacements.items():
            text = text.replace(char, replacement)

        return text

    def _generate_cite_key(self, paper: PaperMetadata) -> str:
        """生成引用键
        格式: 第一作者姓氏_年份_第一个实词
        """
        # 获取第一作者姓氏
        first_author = ""
        if paper.authors and len(paper.authors) > 0:
            first_author = paper.authors[0].split()[-1].lower()

        # 获取年份
        year = str(paper.year) if paper.year else "0000"

        # 从标题中获取第一个实词
        title_word = ""
        if paper.title:
            # 移除特殊字符，分割成单词
            words = re.findall(r'\w+', paper.title.lower())
            # 过滤掉常见的停用词
            stop_words = {'a', 'an', 'the', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
            for word in words:
                if word not in stop_words and len(word) > 2:
                    title_word = word
                    break

        # 组合cite key
        cite_key = f"{first_author}{year}{title_word}"

        # 确保cite key只包含合法字符
        cite_key = re.sub(r'[^a-z0-9]', '', cite_key.lower())

        return cite_key

    def _get_entry_type(self, paper: PaperMetadata) -> str:
        """确定BibTeX条目类型"""
        if hasattr(paper, 'venue_type') and paper.venue_type:
            venue_type = paper.venue_type.lower()
            if venue_type == 'conference':
                return 'inproceedings'
            elif venue_type == 'preprint':
                return 'unpublished'
            elif venue_type == 'journal':
                return 'article'
            elif venue_type == 'book':
                return 'book'
            elif venue_type == 'thesis':
                return 'phdthesis'
        return 'article'  # 默认为期刊文章


    def create_document(self, papers: List[PaperMetadata]) -> str:
        """生成BibTeX格式的参考文献文本"""
        bibtex_text = "% This file was automatically generated by GPT-Academic\n"
        bibtex_text += "% Compatible with: EndNote, Zotero, JabRef, and LaTeX\n\n"

        for paper in papers:
            entry_type = self._get_entry_type(paper)
            cite_key = self._generate_cite_key(paper)

            bibtex_text += f"@{entry_type}{{{cite_key},\n"

            # 添加标题
            if paper.title:
                bibtex_text += f"  title = {{{self._sanitize_bibtex(paper.title)}}},\n"

            # 添加作者
            if paper.authors:
                # 确保每个作者的姓和名正确分隔
                processed_authors = []
                for author in paper.authors:
                    names = author.split()
                    if len(names) > 1:
                        # 假设最后一个词是姓，其他的是名
                        surname = names[-1]
                        given_names = ' '.join(names[:-1])
                        processed_authors.append(f"{surname}, {given_names}")
                    else:
                        processed_authors.append(author)

                authors = " and ".join([self._sanitize_bibtex(author) for author in processed_authors])
                bibtex_text += f"  author = {{{authors}}},\n"

            # 添加年份
            if paper.year:
                bibtex_text += f"  year = {{{paper.year}}},\n"

            # 添加期刊/会议名称
            if hasattr(paper, 'venue_name') and paper.venue_name:
                if entry_type == 'inproceedings':
                    bibtex_text += f"  booktitle = {{{self._sanitize_bibtex(paper.venue_name)}}},\n"
                elif entry_type == 'article':
                    bibtex_text += f"  journal = {{{self._sanitize_bibtex(paper.venue_name)}}},\n"
                    # 添加期刊相关信息
                    if hasattr(paper, 'venue_info'):
                        if 'volume' in paper.venue_info:
                            bibtex_text += f"  volume = {{{paper.venue_info['volume']}}},\n"
                        if 'number' in paper.venue_info:
                            bibtex_text += f"  number = {{{paper.venue_info['number']}}},\n"
                        if 'pages' in paper.venue_info:
                            bibtex_text += f"  pages = {{{paper.venue_info['pages']}}},\n"
            elif paper.venue:
                venue_field = "booktitle" if entry_type == "inproceedings" else "journal"
                bibtex_text += f"  {venue_field} = {{{self._sanitize_bibtex(paper.venue)}}},\n"

            # 添加DOI
            if paper.doi:
                bibtex_text += f"  doi = {{{paper.doi}}},\n"

            # 添加URL
            if paper.url:
                bibtex_text += f"  url = {{{paper.url}}},\n"
            elif paper.doi:
                bibtex_text += f"  url = {{https://doi.org/{paper.doi}}},\n"

            # 添加摘要
            if paper.abstract:
                bibtex_text += f"  abstract = {{{self._sanitize_bibtex(paper.abstract)}}},\n"

            # 添加机构
            if hasattr(paper, 'institutions') and paper.institutions:
                institutions = " and ".join([self._sanitize_bibtex(inst) for inst in paper.institutions])
                bibtex_text += f"  institution = {{{institutions}}},\n"

            # 添加月份
            if hasattr(paper, 'month'):
                bibtex_text += f"  month = {{{paper.month}}},\n"

            # 添加注释字段
            if hasattr(paper, 'note'):
                bibtex_text += f"  note = {{{self._sanitize_bibtex(paper.note)}}},\n"

            # 移除最后一个逗号并关闭条目
            bibtex_text = bibtex_text.rstrip(',\n') + "\n}\n\n"

        return bibtex_text